//
//  $Id: NSString_FJNStringEncoding.m 84 2008-03-18 13:25:05Z fujidana $
//
//  Copyright (c) 2006-2008 FUJIDANA. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. Neither the name of the author nor the names of its contributors
//    may be used to endorse or promote products derived from this software
//    without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//

#import "NSString_FJNStringEncoding.h"
#import <string.h>


@interface NSString (FJNStringEncoding_Private)

- (id)initWithDataOfISO2022Format:(NSData *)data usedEncoding:(NSStringEncoding *)enc error:(NSError **)outError;
- (id)initWithDataBySequentialGuess:(NSData *)data usedEncoding:(NSStringEncoding *)enc error:(NSError **)outError;

@end


@implementation NSString (FJNStringEncoding)

#pragma mark Public methods

+ (id)stringWithContentsOfFile:(NSString *)path usedJapaneseEncoding:(NSStringEncoding *)enc error:(NSError **)outError
{
	NSURL *url = [NSURL fileURLWithPath:path];
	return [[[self alloc] initWithContentsOfURL:url usedJapaneseEncoding:enc error:outError] autorelease];
}

+ (id)stringWithContentsOfURL:(NSURL *)url usedJapaneseEncoding:(NSStringEncoding *)enc error:(NSError **)outError
{
	return [[[self alloc] initWithContentsOfURL:url usedJapaneseEncoding:enc error:outError] autorelease];
}

- (id)initWithContentsOfFile:(NSString *)path usedJapaneseEncoding:(NSStringEncoding *)enc error:(NSError **)outError
{
	NSURL *url = [NSURL fileURLWithPath:path];
	return [self initWithContentsOfURL:url usedJapaneseEncoding:enc error:outError];
}

- (id)initWithContentsOfURL:(NSURL *)url usedJapaneseEncoding:(NSStringEncoding *)enc error:(NSError **)outError
{
	Class aClass = [self class];
	NSZone *zone = [self zone];
	[self release];
	
	// Load data
	NSData *data = [NSData dataWithContentsOfURL:url options:0 error:outError];
	if (data == nil) return nil;
	
	// First, check whether the string encoding is ISO-2022 or not.
	// (Because initWithContentsOfURL:usedEncoding:error: in second step misregards ISO-2022-JP as Unicode.
	self = [[aClass allocWithZone:zone] initWithDataOfISO2022Format:data usedEncoding:enc error:outError];
	if (self != nil) return self;
	
	// Second, use the Cocoa API (initWithContentsOfURL:usedEncoding:error:).
	// It seems that a string encoded in Unicode with BOM can be decoded well;
	// the other encodings seems to be undetectable.
	self = [[aClass allocWithZone:zone] initWithContentsOfURL:url usedEncoding:enc error:outError];
	if (self != nil) return self;
	
	// Third, try to interpret with encodings in turn.
	self = [[aClass allocWithZone:zone] initWithDataBySequentialGuess:data usedEncoding:enc error:outError];
	if (self != nil) return self;
	
	return nil;
}

- (id)initWithData:(NSData *)data usedJapaneseEncoding:(NSStringEncoding *)enc error:(NSError **)outError
{
	Class aClass = [self class];
	NSZone *zone = [self zone];
	[self release];
	
	// Check ISO-2022-JP or not
	self = [[aClass allocWithZone:zone] initWithDataOfISO2022Format:data usedEncoding:enc error:outError];
	if (self != nil) return self;
	
	// Check encodings in turn
	self = [[aClass allocWithZone:zone] initWithDataBySequentialGuess:data usedEncoding:enc error:outError];
	if (self != nil) return self;
	
	return nil;
}

#pragma mark Private methods

- (id)initWithDataOfISO2022Format:(NSData *)data usedEncoding:(NSStringEncoding *)enc error:(NSError **)outError
{
	// Search for ESC $ or ESC (, which appears typically in ISO-2022-JP encoding.
	const char *p = [data bytes];
	unsigned length = [data length];
	const char *p1;
	if ((p1 = memchr(p, 0x1b, length - 1)) != NULL) // ESC == 0x1b
	{
		if (*(p1+1) == '$' || *(p1 + 1) == '(')
		{
			self = [self initWithData:data encoding:NSISO2022JPStringEncoding];
			if (self != nil)
			{
				if (enc != NULL) *enc = NSISO2022JPStringEncoding;
			}
			else
			{
				// if the data can not be intepreted with ISO-2022-JP encoding, return nil.
				if (outError != NULL) *outError = [NSError errorWithDomain:NSCocoaErrorDomain
																	  code:NSFileReadInapplicableStringEncodingError
																  userInfo:nil];
			}
			return self;
		}
	}
	// if escape sequense can not be found, release self end return nil.
	if (outError != nil)
	{
		*outError = [NSError errorWithDomain:NSCocoaErrorDomain
										code:NSFileReadInapplicableStringEncodingError
									userInfo:nil];
	}
	[self release];
	return nil;
}

- (id)initWithDataBySequentialGuess:(NSData *)data usedEncoding:(NSStringEncoding *)enc error:(NSError **)outError
{
	Class aClass = [self class];
	NSZone *zone = [self zone];
	[self release];
	
	// Try each encodings listed in "encodings[]".
	// You can reorder this list if necessary.
	NSStringEncoding encodings[] =
	{
		NSNonLossyASCIIStringEncoding,
		NSUTF8StringEncoding,
		NSShiftJISStringEncoding,
		NSJapaneseEUCStringEncoding,
		NSUnicodeStringEncoding,
		0
	};
	
	int i = 0;
	NSStringEncoding encoding;
	
	while ((encoding = encodings[i++]) != 0)
	{
		self = [[aClass allocWithZone:zone] initWithData:data encoding:encoding];
		if (self != nil)
		{
			if (enc != NULL) *enc = encoding;
			return self;
		}
	}
	
	// if the data can not intepreted with any string encoding, release self end return nil.
	if (outError != NULL) *outError = [NSError errorWithDomain:NSCocoaErrorDomain
														  code:NSFileReadInapplicableStringEncodingError
													  userInfo:nil];
	return nil;
}

@end
